# clear environment
rm(list=ls())

# load packages
library(ggplot2)

# load data set on police journals
dat = read.csv("police_metadata.csv")

# constant for counting
dat$one <- 1

# aggregate on year level
agg <- aggregate(one~union+year+section, data=dat, FUN=sum)

# state name
agg$section[agg$section=="BE"] <- "Berlin"
agg$section[agg$section=="BG"] <- "Brandenburg"
agg$section[agg$section=="BK"] <- "Federal Criminal Police"
agg$section[agg$section=="BN"] <- "Bremen"
agg$section[agg$section=="BP"] <- "Federal Police"
agg$section[agg$section=="BW"] <- "Baden-Wuerttemberg"
agg$section[agg$section=="BY"] <- "Bavaria"
agg$section[agg$section=="DP"] <- "Main issue"
agg$section[agg$section=="HE"] <- "Hessen"
agg$section[agg$section=="HH"] <- "Hamburg"
agg$section[agg$section=="MV"] <- "Mecklenburg-Vorpommern"
agg$section[agg$section=="NI"] <- "Niedersachsen"
agg$section[agg$section=="NW"] <- "North-Rhine Westphalia"
agg$section[agg$section=="DE"] <- "Main issue"
agg$section[agg$section=="RP"] <- "Rheinland-Pfalz"
agg$section[agg$section=="SD"] <- "Saarland"
agg$section[agg$section=="SH"] <- "Schleswig-Holstein"
agg$section[agg$section=="SN"] <- "Saxony"
agg$section[agg$section=="ST"] <- "Saxony-Anhalt"
agg$section[agg$section=="TH"] <- "Thuringia"

# factor levels
agg$section <- factor(agg$section, levels=rev(c("Main issue", "Baden-Wuerttemberg","Bavaria", "Berlin", "Brandenburg", "Bremen", "Federal Police", "Federal Criminal Police",
                                            "Hamburg", "Hessen", "Mecklenburg-Vorpommern", "Niedersachsen", "North-Rhine Westphalia", "Rheinland-Pfalz",
                                            "Saarland", "Saxony", "Saxony-Anhalt", "Schleswig-Holstein", "Thuringia")))

# year to numeric
agg$year <- as.numeric(agg$year)

# union variable
agg$union[agg$union=="dpolg"]<-"DPolG"
agg$union[agg$union=="gdp"]<-"GdP"


# variable with share of available issues
agg$pct_issues[agg$union=="GdP"] <- agg$one[agg$union=="GdP"]/12
agg$pct_issues[agg$union=="DPolG"] <- agg$one[agg$union=="DPolG"]/10

# create
agg$pct_issues_cat[agg$pct_issues==1] <- "c"
agg$pct_issues_cat[agg$pct_issues>=.5 & agg$pct_issues<1] <- "b"
agg$pct_issues_cat[agg$pct_issues>0 & agg$pct_issues<.5] <- "a"


# plot coverage
ggplot(data=agg, aes(x=section, y=year, color=pct_issues_cat)) + 
  coord_flip() + 
  geom_point() +
  theme_classic() +
  theme(legend.position = "bottom", text=element_text(size=15), axis.text.x = element_text(angle=90)) +
  scale_color_brewer(palette="YlOrRd", type="seq", limits=c(LETTERS[1:3], letters[1], LETTERS[4], letters[2], LETTERS[5:6], letters[3]), breaks=letters[1:3],
                     labels=c(">0 & <50", ">50 & <100", "100"), name="% Issues in Sample (by Year)", guide=guide_legend(title.position="top")) +
  xlab("Jurisdiction") + ylab("Year") +
  scale_y_continuous(limits=c(2000, 2021), breaks=seq(2000, 2020, 5)) +
  facet_wrap(~union)

ggsave("FigE2.pdf", width=7, height=5)
